rm(list = ls())
library(tidyverse)
library(estimatr)
library(texreg)
library(ggthemes)

df = read_csv("data/ukraine_final.csv")





# DATA PREP ----------------------------------------------------------------
#defining controls to convert to factors
cols = c("ethnicity", "language", "religion", "sex", "region", "job")


df = df %>% 
  mutate(pcr_il = recode(pcr_il, "Very Unfavorable" = 0, "Unfavorable" = 1, "Neutral" = 2,
                         "Favorable" = 3, "Very Favorable" = 4, .default = NA_real_),
         pcr_ul = recode(pcr_ul, "Very Unfavorable" = 0, "Unfavorable" = 1, "Neutral" = 2,
                         "Favorable" = 3, "Very Favorable" = 4, .default =  NA_real_),
         pcr_par = recode(pcr_par, "Very Unfavorable" = 0, "Unfavorable" = 1, "Neutral" = 2,
                          "Favorable" = 3, "Very Favorable" = 4, .default =  NA_real_),
         
         pcr_il_p = recode(pcr_il_p, "Definitely not" = 0, "Probably not" = 1, "Probably yes" = 2,
                           "Definitely yes" = 3, .default =  NA_real_),
         pcr_il_j = recode(pcr_il_j, "Definitely not" = 0, "Probably not" = 1, "Probably yes" = 2,
                           "Definitely yes" = 3, .default =  NA_real_),
         
         pcr_ul_p = recode(pcr_ul_p, "Definitely not" = 0, "Probably not" = 1, "Probably yes" = 2,
                           "Definitely yes" = 3, .default =  NA_real_),
         pcr_ul_j = recode(pcr_ul_j, "Definitely not" = 0, "Probably not" = 1, "Probably yes" = 2,
                           "Definitely yes" = 3, .default =  NA_real_),
         
         pcr_par_p = recode(pcr_par_p, "Definitely not" = 0, "Probably not" = 1, "Probably yes" = 2,
                            "Definitely yes" = 3, .default =  NA_real_),
         pcr_par_j = recode(pcr_par_j, "Definitely not" = 0, "Probably not" = 1, "Probably yes" = 2,
                            "Definitely yes" = 3, .default =  NA_real_),
         
         pcr_mod = recode(pcr_mod, "Strongly Disapprove" = 0, "Disapprove" = 1, "Neutral" = 2, 
                          "Approve" = 3, "Strongly Approve" = 4, .default = NA_real_),
         
         region = recode(region, "The territory currently known as the Donetsk and Luhansk People Republics" = "LNR/DNR",
                         "The parts of Luhansk or Donetsk Oblast that are currently controlled by the Ukrainian government." = "East",
                         "Any other territory of Ukraine." = "West"),
         
         educ = recode(educ, "Primary (less than 7 years) complete or incomplete" = 0,
                       "Incomplete secondary (less than 10 years)" = 1,
                       "PTU without a secondary education (vocational elementary education)" = 2,
                       "General secondary" = 3,
                       "PTU with a secondary education (vocational secondary education)" = 4,
                       "Special technical" = 5, "Incomplete higher" = 6, 
                       "Higher (diploma of bachelor, specialist or master degree)" = 7, 
                       .default = NA_real_),
         
         income = recode(income, "Less than 1000 hryvnia" = 0, "1000-2000 hryvnia" = 1,
                         "2000-3000 hryvina" = 2, "3000-4000 hryvnia" = 3, "4000-5000 hryvnia" = 4,
                         "5000-6000 hryvnia" = 5, "6000-7000 hryvnia" = 6, "7000-8000 hryvnia" = 7,
                         "More than 8000 hryvnia" = 8, .default = NA_real_),

         age = as.numeric(age),
         nationalist = ifelse(ethnicity %in% c("I feel more Ukrainian than Russian", "I feel only Ukrainian"), 1, 0)) %>% 
  mutate_at(.vars = cols, .funs = funs(factor))

df$income2 = cut(df$income, 
                 breaks = quantile(df$income, probs = c(0, .33, .67, 1), na.rm = T), 
                 labels = c("Up ot 4000 hryvnia", "Up ot 7000 hryvnia", "Over 7000 hryvnia"), 
                 include.lowest = T)

df$religion2 = recode(df$religion, "Greek Catholic Church" = "Other", "Muslim (Islam)" = "Other",
                      "Protestant" = "Other", "Roman Catholic" = "Other", 
                      "Ukrainian Autocephalous Orthodox Church" = "Other",
                      "Religious but do not belong to a certain religion or church" = "Religious but no church")

df$job2 = recode(df$job, "Entrepreneur, farmer" = "Other", "Military servant" = "Other",
                 "Pension (because of age or disability)" = "Pension", 
                 "Professional (with higher education)" = "Professional", 
                 "Self employed businesswomen/men" = "Self-employed",
                 "Servant (without higher education)" = "Servant (no higher ed.)")

df$university = ifelse(df$educ == 7, 1,
                       ifelse(df$educ < 7, 0, NA))



# Controls ----------------------------------------------------------------
df$russian_eth = ifelse(df$ethnicity %in% c("I feel more Russian than Ukrainian", "I feel only Russian"), 1,
                        ifelse(df$ethnicity %in% c("I feel equally Ukrainian and Russian", "I feel more Ukrainian than Russian",
                                                   "I feel only Ukrainian", "I feel neither Russian nor Ukrainian"), 0, NA))
df$dfr_eth = ifelse(df$ethnicity %in% c("I feel more Ukrainian than Russian", "I feel only Ukrainian"), 1,
                    ifelse(df$ethnicity %in% c("I feel equally Ukrainian and Russian", "I feel more Russian than Ukrainian",
                                               "I feel only Russian", "I feel neither Russian nor Ukrainian"), 0, NA))

df$both_eth = ifelse(df$ethnicity %in% c("I feel equally Ukrainian and Russian"), 1,
                     ifelse(df$ethnicity %in% c("I feel equally Ukrainian and Russian", "I feel more Russian than Ukrainian",
                                                "I feel only Russian", "I feel neither Russian nor Ukrainian",
                                                "I feel more Ukrainian than Russian", "I feel only Ukrainian"), 0, NA))

df$russian_lang = ifelse(df$language %in% c("I speak both, but mostly Russian", "I speak Russian"), 1, 0)

df$employed = ifelse(df$job2 %in% c("Professional", "Self-employed", "Worker, farmworker"), 1, 0)

df$orthodox_rlgn = ifelse(df$religion2 %in% c("Ukrainian Orthodox Church", "Ukrainian Orthodox Church (Moscow Patriarchate)",
                                              "Ukrainian Orthodox Church (Kyiv Patriarchate)"), 1, 0)

df$other_rlgn = ifelse(df$religion2 %in% c("Other", "Religious but no church",
                                           "Unbeliever, atheist"), 1, 0)



#Prep for hypotheses ------------------------------------------------------
#ingroup vs. outgroup
df = df %>% 
  mutate(ingroup = ifelse((pcr_mod %in% c(3,4) & t_pcr == "separatists")|(pcr_mod %in% c(0,1) & t_pcr == "govt"), 1, 0), #ingroup includes supporters of either that received a treatment telling them their group was responsible Note I'm including both because of those who received seps/gov EQUALLY responsible
         outgroup = ifelse((pcr_mod %in% c(3,4) & t_pcr == "govt")|(pcr_mod %in% c(0,1) & t_pcr == "separatists"), 1, 0))


df = df %>% mutate(in_out = ifelse(outgroup == 1, "outgroup", ifelse(ingroup == 1, "ingroup", "control")),
                   in_out = factor(in_out, levels = c("control", "ingroup", "outgroup")))

#revenge
df = df %>% mutate(gov_kill = recode(progov_fam, "Yes" = 1, "No" = 0, .default = NA_real_),
                   sep_kill = recode(sep_fam, "Yes" = 1, "No" = 0, .defatul = NA_real_),
                   fam_kill = ifelse(gov_kill == 1 | sep_kill == 1, 1,
                                     ifelse(gov_kill == 0 & sep_kill == 0, 0, NA)))




df = df %>% filter(pcr_mod != 2)


df$region_sep = case_when(df$region == "LNR/DNR" ~ 1,
                          TRUE ~ 0)

df$female = case_when(df$sex == "Female" ~ 1,
                      TRUE ~ 0)
df$income3 = case_when(df$income2 == "Up ot 4000 hryvnia" ~ 0,
                       df$income2 %in% c("Up ot 7000 hryvnia", "Over 7000 hryvnia") ~ 1,
                       TRUE ~ NA_real_)
library(arsenal)

region = lm(region_sep ~ in_out, data = df) %>% summary
region = c(region$fstatistic[1], pf(region$fstatistic[1],region$fstatistic[2],region$fstatistic[3],lower.tail=FALSE))
region = paste0(round(region[1], digits = 2), " (",round(region[2], digits = 2),")")

age = lm(age ~ in_out, data = df) %>% summary
age = c(age$fstatistic[1], pf(age$fstatistic[1],age$fstatistic[2],age$fstatistic[3],lower.tail=FALSE))
age = paste0(round(age[1], digits = 2), " (",round(age[2], digits = 2),")")

female = lm(female ~ in_out, data = df) %>% summary
female = c(female$fstatistic[1], pf(female$fstatistic[1],female$fstatistic[2],female$fstatistic[3],lower.tail=FALSE))
female = paste0(round(female[1], digits = 2), " (",round(female[2], digits = 2),")")

university = lm(university ~ in_out, data = df) %>% summary
university = c(university$fstatistic[1], pf(university$fstatistic[1],university$fstatistic[2],university$fstatistic[3],lower.tail=FALSE))
university = paste0(round(university[1], digits = 2), " (",round(university[2], digits = 2),")")

income = lm(income3 ~ in_out, data = df) %>% summary
income = c(income$fstatistic[1], pf(income$fstatistic[1],income$fstatistic[2],income$fstatistic[3],lower.tail=FALSE))
income = paste0(round(income[1], digits = 2), " (",round(income[2], digits = 2),")")

nationalist = lm(nationalist ~ in_out, data = df) %>% summary
nationalist = c(nationalist$fstatistic[1], pf(nationalist$fstatistic[1],nationalist$fstatistic[2],nationalist$fstatistic[3],lower.tail=FALSE))
nationalist = paste0(round(nationalist[1], digits = 2), " (",round(nationalist[2], digits = 2),")")

employed = lm(employed ~ in_out, data = df) %>% summary
employed = c(employed$fstatistic[1], pf(employed$fstatistic[1],employed$fstatistic[2],employed$fstatistic[3],lower.tail=FALSE))
employed = paste0(round(employed[1], digits = 2), " (",round(employed[2], digits = 2),")")

russian_eth = lm(russian_eth ~ in_out, data = df) %>% summary
russian_eth = c(russian_eth$fstatistic[1], pf(russian_eth$fstatistic[1],russian_eth$fstatistic[2],russian_eth$fstatistic[3],lower.tail=FALSE))
russian_eth = paste0(round(russian_eth[1], digits = 2), " (",round(russian_eth[2], digits = 2),")")

russian_lang = lm(russian_lang ~ in_out, data = df) %>% summary
russian_lang = c(russian_lang$fstatistic[1], pf(russian_lang$fstatistic[1],russian_lang$fstatistic[2],russian_lang$fstatistic[3],lower.tail=FALSE))
russian_lang = paste0(round(russian_lang[1], digits = 2), " (",round(russian_lang[2], digits = 2),")")

orthodox_rlgn = lm(orthodox_rlgn ~ in_out, data = df) %>% summary
orthodox_rlgn = c(orthodox_rlgn$fstatistic[1], pf(orthodox_rlgn$fstatistic[1],orthodox_rlgn$fstatistic[2],orthodox_rlgn$fstatistic[3],lower.tail=FALSE))
orthodox_rlgn = paste0(round(orthodox_rlgn[1], digits = 2), " (",round(orthodox_rlgn[2], digits = 2),")")



tab = tableby(in_out ~ region_sep + age + female + university + income3 + nationalist + 
                employed + russian_eth + russian_lang + orthodox_rlgn, data = df, 
            total = F, test = F, digits = 2L, numeric.stats =  c("Nmiss2", "meansd", "range")) %>% 
  summary(text = "latex", labelTranslations = list(region_sep = "Separatist region",
                                                   age = "Age",
                                                   female = "Female",
                                                   university = "University educated",
                                                   income3 = "Income (over 4000 hryvnia)",
                                                   nationalist = "Nationalism (feeling more Ukrainain)",
                                                   employed = "Employed",
                                                   russian_eth = "Russian ethnic",
                                                   russian_lang = "Mostly speaks Russian",
                                                   orthodox_rlgn = "Ukrainian Orthodox Church (incl. Moscow Patriarchate)",
          pfootnote = T, width = 5))

tab$object$in_out$variable
tab$object$in_out$`F-Stat (p.value)` = c(region, "", "", "", age, "", "", "", female, "", "", "", university, "", "", "", income, "", "", "", nationalist, "", "", "", employed, "", "", "", russian_eth, "", "", "", russian_lang, "", "", "", orthodox_rlgn, "", "", "")


capture.output(tab, file="tabs/balance_table.tex")


